clear
*cd "directory"

// calculate aggregate sample means 
insheet using aggregate_npsas_raw.csv, clear

*adjust for inflation 
merge m:1 year using cpi, keep(matched ) nogen
foreach var of varlist tot_cost-plus_amt tuit_minus_instgr instgr cpi_tuition {
replace `var'=`var'*258.85/cpi_all 
}

gen double count = tot_grant_wss  
*consistently define key variables: 
*total loans to include PLUS
replace tot_ln_inc=tot_ln_exc if tot_ln_inc ==.
*finantial aid net of loans 
gen fin_aid=tot_aid-tot_ln_inc
*net price as total sticker tuition minus aid 
gen net_price=tot_tuition-fin_aid
*net cost as cost of attendence minus aid 
replace net_cost=tot_cost-fin_aid 
*living expenses 
gen liv_exp=tot_cost-tot_tuition 
recode va_dod .=0
recode plus_amt .=0 

collapse (mean) avg_net_tuit = net_tuition avg_tot_tuition=tot_tuition avg_net_cost = net_cost avg_liv_exp = liv_exp avg_bor = tot_ln_inc avg_va_dod=va_dod avg_fin_aid = fin_aid avg_tot_grant=tot_grant  avg_net_price=net_price avg_tot_ln_inc=tot_ln_inc (rawsum) avg_enrollment=count [aw = count], by( year)

tempfile tot_avg
save `tot_avg'


*aggregate means by degree and year 
insheet using deg_type_npsas_raw.csv, clear

*adjust for inflation 
merge m:1 year using cpi, keep(matched ) nogen
foreach var of varlist tot_cost-plus_amt tuit_minus_instgr instgr cpi_tuition {
replace `var'=`var'*258.85/cpi_all 
}

gen double count = tot_grant_wss  
*consistently define key variables: 
*total loans to include PLUS
replace tot_ln_inc=tot_ln_exc if tot_ln_inc ==.
*finantial aid net of loans 
gen fin_aid=tot_aid-tot_ln_inc
*net price as total sticker tuition minus aid 
gen net_price=tot_tuition-fin_aid
*net cost as cost of attendence minus aid 
replace net_cost=tot_cost-fin_aid 
*living expenses 
gen liv_exp=tot_cost-tot_tuition 
recode va_dod .=0
recode plus_amt .=0 

collapse (mean) avg_net_tuit = net_tuition avg_tot_tuition=tot_tuition avg_net_cost = net_cost avg_liv_exp = liv_exp avg_bor = tot_ln_inc avg_va_dod=va_dod avg_fin_aid = fin_aid avg_tot_grant=tot_grant  avg_net_price=net_price avg_tot_ln_inc=tot_ln_inc [aw = count], by(deg_type year)

tempfile tot_deg_avg
save `tot_deg_avg'



// master data set to construct price index 
insheet using detailed_npsas_raw.csv, clear
*adjust for inflation 
merge m:1 year using cpi, keep(matched using ) nogen
foreach var of varlist tot_cost-plus_amt tuit_minus_instgr instgr cpi_tuition {
replace `var'=`var'*258.85/cpi_all 
}

egen group=group(sector deg_type gender depend same liv ), missing 

gen double count = tot_grant_wss  
*consistently define key variables: 
*total loans to include PLUS
replace tot_ln_inc=tot_ln_exc if tot_ln_inc ==.
*finantial aid net of loans 
gen fin_aid=tot_aid-tot_ln_inc
*net price as total sticker tuition minus aid 
gen net_price=tot_tuition-fin_aid
*net cost as cost of attendence minus aid 
replace net_cost=tot_cost-fin_aid 
*living expenses 
gen liv_exp=tot_cost-tot_tuition 
recode va_dod .=0
recode plus_amt .=0 

*exclude 1990 data because many variables seem incomparable to post 1990 data 
drop if year==1990
save master,replace



*** Calculate the laspeyeres nums and denoms for each of the 4 files to form aggregate index 
use master, clear
drop if deg_type==""
gen time=1 if year==1990
replace time=2 if year==1993
replace time=3 if year==1996
replace time=4 if year==2000
replace time=5 if year==2004
replace time=6 if year==2008
replace time=7 if year==2012
replace time=8 if year==2016
replace time=9 if year==2020
tsset group time

*Laspeyers numerator and denominator 
gen double sticker_num = (tot_tuition * l.count)
gen double sticker_den = (l.tot_tuition * l.count)

gen double net_tuition_num = (net_tuition * l.count)
gen double net_tuition_den = (l.net_tuition * l.count)

gen double net_price_num = (net_price * l.count)
gen double net_price_den = (l.net_price * l.count)

gen double fin_aid_num = (fin_aid * l.count)
gen double fin_aid_den = (l.fin_aid * l.count)

gen double net_cost_num = (net_cost * l.count)
gen double net_cost_den = (l.net_cost * l.count)

gen double liv_exp_num = (liv_exp * l.count)
gen double liv_exp_den = (l.liv_exp * l.count)

gen double tot_ln_inc_num = (tot_ln_inc * l.count)
gen double tot_ln_inc_den = (l.tot_ln_inc * l.count)
renam tot_tuition sticker 

collapse (rawsum) *_num *_den enrollment = count (mean) sticker  net_tuition net_cost liv_exp fin_aid net_price tot_ln_inc  [aw=count], by(year)

foreach var of varlist sticker net_tuition net_cost liv_exp fin_aid  net_price tot_ln_inc {
	gen z=`var'_num/`var'_den 
	recode z .=1
	sort year
	gen i_`var'=1
	replace i_`var'=i_`var'[_n-1]*z if i_`var'[_n-1]~=.
	drop z
}

drop *_num *_den 

// Merge with cpi and raw averages
merge m:1 year using cpi, keep(matched using ) nogen
merge 1:1 year using `tot_avg', keep(matched master) nogen 

*aotc is total tax expenditure (Treasury) divided by total postsecondary enrollment (NCES)
gen aotc=.
replace aotc=(4.2+2.4)/15.7 if year==2000
replace aotc=(3.3+2.2)/17.7 if year==2004
replace aotc=(3.8+2.5)/19.6 if year==2008
replace aotc=(15.6+2+5.9)/20.9 if year==2012
replace aotc=(15.5+4.6)/21.2 if year==2016
replace aotc=(16.4+4.0)/19.4 if year==2020
recode aotc .=0
replace aotc=aotc*258.85/cpi_all*1000 


*compare to CPI tuition and fees on same inflation adjusted basis: 
replace cpi_tuition=cpi_tuition*258.85/cpi_all 
sum cpi_tuition if year==1993, meanonly 
gen cpi_tuition_graph = cpi_tuition/r(mean)
sum sticker if year == 1993, meanonly
replace cpi_tuition_graph = cpi_tuition_graph*r(mean)

foreach var of varlist sticker net_tuition net_cost liv_exp fin_aid net_price tot_ln_inc {
	
	sum `var' if year==1993, meanonly 
	gen `var'_graph=i_`var'*r(mean)
} 

gen tax_tuition_graph=net_price_graph-aotc

*** Plot #1: Inflation comparison tuition with CPI and College board
twoway line cpi_tuition_graph sticker_graph net_price_graph tax_tuition_graph year if year>1992,  ///
    lpattern(solid dash dot longdash) lcolor(black black black black) ///
    xlabel(1993(5)2020) ///
    title("Inflation in Net Tuition and Fees") ///
    legend(order(1 "CPI Tuition and Fees" 2 "Sticker Price" 3 "Net-of-Aid Tuition Paid" 4 "Net Tuition - Tax Benefits") pos(6) r(2)) ///
    ytitle("Tuition and fees (2020 $)") xtitle("Year") ///
    note("Sources: U.S. Bureau of Labor Statistics, National Postsecondary Student Aid Survey." "NPSAS sample of U.S. students pursuing AA, BA, and graduate degrees. Financial aid excludes loans." "Tax credits include AOTC, HOPE, LLC.")
graph export "..\visuals\bw_1_inflation.png", as(png) replace
	


**********************************************************

*INDEX BY DEGREE LEVEL 

use master, clear
drop if deg_type==""
gen time=1 if year==1990
replace time=2 if year==1993
replace time=3 if year==1996
replace time=4 if year==2000
replace time=5 if year==2004
replace time=6 if year==2008
replace time=7 if year==2012
replace time=8 if year==2016
replace time=9 if year==2020
tsset group time

gen double sticker_num = (tot_tuition * l.count)
gen double sticker_den = (l.tot_tuition * l.count)

gen double net_tuition_num = (net_tuition * l.count)
gen double net_tuition_den = (l.net_tuition * l.count)

gen double net_price_num = (net_price * l.count)
gen double net_price_den = (l.net_price * l.count)

gen double fin_aid_num = (fin_aid * l.count)
gen double fin_aid_den = (l.fin_aid * l.count)

gen double net_cost_num = (net_cost * l.count)
gen double net_cost_den = (l.net_cost * l.count)

gen double liv_exp_num = (liv_exp * l.count)
gen double liv_exp_den = (l.liv_exp * l.count)

gen double tot_ln_inc_num = (tot_ln_inc * l.count)
gen double tot_ln_inc_den = (l.tot_ln_inc * l.count)
renam tot_tuition sticker 
collapse (rawsum) *_num *_den enrollment = count (mean) sticker  net_tuition net_cost liv_exp fin_aid  net_price tot_ln_inc  [aw=count], by(deg_type year)

foreach var of varlist sticker net_tuition net_cost liv_exp fin_aid  net_price tot_ln_inc {
	gen z=`var'_num/`var'_den 
	recode z .=1
	sort deg_type year
	gen i_`var'=1
		by deg_type: replace i_`var'=i_`var'[_n-1]*z if i_`var'[_n-1]~=.
	drop z
}

drop *_num *_den 

// Merge with cpi and raw averages
merge m:1 year using cpi, keep(matched) nogen
merge 1:1 deg_type year using `tot_deg_avg', keep(matched master) nogen 

egen id=group(deg_type)
tab id deg_type
keep sticker net_tuition net_price liv_exp tot_ln_inc avg* i_* year  id
reshape wide sticker net_tuition net_price liv_exp tot_ln_inc avg* i_*, i(year) j(id)

foreach var of varlist sticker* net_tuition* net_price* liv_exp* tot_ln_inc*  {
	sum `var' if year==1993, meanonly 
	gen `var'_graph=i_`var'*r(mean)
} 

*figure 3: inflatino by degree 
twoway line i_net_price1 i_net_price2 i_net_price3 i_net_price4  year if year>1992, lcolor(navy blue red green orange yellow) 	xlabel(1993(5)2020)	 ///
title("Net-of-Aid Tuition Inflation by Degree Level") 							///
legend(order(1 "Asociate's" 2 "Bachelor's" 3 "Master's" 4 "Professional" ) pos(6) r(1))  	///
    lpattern(dot solid dash longdash) lcolor(black black black black)  ///
ytitle("Index") xtitle("Year") 									///
note("Source: National Postsecondary Student Aid Survey. Aid includes grants, workstudy, Veterans and DoD benefits," "and gradauate assistantships. Excludes tax benefits.")
graph export "..\visuals\bw_3_inflation_by_degree.png", as(png) replace

*plot 5A loans by degree 
graph twoway line avg_bor1 avg_bor2 avg_bor3 avg_bor4 avg_bor5 year  if year>1992, lcolor(navy blue red green orange yellow)  xlabel(1993(5)2020) yaxis(1) ytitle("Loan amount ($)") title("Annual Borrowing By Degree Level") legend(order(1 "AA" 2 "BA" 3 "MA" 4  "Professional" 5 "PhD") position(6) r(1)) xtitle("Year")  ///
    lpattern(dot solid dash longdash dash_dot) lcolor(black black black black black)  ///
            note("Average total Loans, including PLUS, across all students. Source: National Postsecondary Student Aid Survey.")
graph export "..\visuals\bw_5a_loans_by_degree.png", as(png) replace


*** Plot 8: Living expenses  by degree level
*living expenses for grad students incomparable in 1993
replace liv_exp3_graph=. if year==1993
replace liv_exp4_graph=. if year==1993
replace liv_exp5_graph=. if year==1993

twoway line liv_exp1_graph liv_exp2_graph liv_exp3_graph liv_exp4_graph liv_exp5_graph  year if year>1992,  ///
    lpattern(dot solid dash longdash dash_dot) lcolor(black black black black black)  ///
xlabel(1993(5)2020)													///
title("Living Expenses by Degree Level") 									///
subtitle("Inflation adjusted (2020 dollars)") 									///
legend(order(1 "Associate's" 2 "Bachelor's"  3 "Master's" 4 "Professional" 5 "PhD") pos(6) r(1))  	///
ytitle("Living Expense ($)") xtitle("Year") 									///
note("Sources: National Postsecondary Student Aid Survey.")			
graph export "..\visuals\bw_8_living_expenses_degree.png", as(png) replace


			
******************************************
*by sector  and degree 
use master, clear
drop if deg_type==""
drop if deg_type=="phd-r"
gen alt_sector=.
replace alt_sector=1 if sector=="pub" & deg_type=="ba"
replace alt_sector=2 if sector=="pub" & deg_type=="ma"
replace alt_sector=3 if sector=="pub" & deg_type=="phd-p"
replace alt_sector=4 if sector=="pnp" & deg_type=="ba"
replace alt_sector=5 if sector=="pnp" & deg_type=="ma"
replace alt_sector=6 if sector=="pnp" & deg_type=="phd-p"
drop if alt_sector==.

gen time=1 if year==1990
replace time=2 if year==1993
replace time=3 if year==1996
replace time=4 if year==2000
replace time=5 if year==2004
replace time=6 if year==2008
replace time=7 if year==2012
replace time=8 if year==2016
replace time=9 if year==2020
tsset group time

gen double sticker_num = (tot_tuition * l.count)
gen double sticker_den = (l.tot_tuition * l.count)

gen double net_tuition_num = (net_tuition * l.count)
gen double net_tuition_den = (l.net_tuition * l.count)

gen double net_price_num = (net_price * l.count)
gen double net_price_den = (l.net_price * l.count)

gen double fin_aid_num = (fin_aid * l.count)
gen double fin_aid_den = (l.fin_aid * l.count)

gen double net_cost_num = (net_cost * l.count)
gen double net_cost_den = (l.net_cost * l.count)

gen double liv_exp_num = (liv_exp * l.count)
gen double liv_exp_den = (l.liv_exp * l.count)

gen double tot_ln_inc_num = (tot_ln_inc * l.count)
gen double tot_ln_inc_den = (l.tot_ln_inc * l.count)
renam tot_tuition sticker 
collapse (rawsum) *_num *_den enrollment = count (mean) sticker  net_tuition net_cost liv_exp fin_aid  net_price tot_ln_inc  [aw=count], by(alt_sector year)

renam alt_sector sector 

foreach var of varlist sticker net_tuition net_cost liv_exp fin_aid  net_price tot_ln_inc {
	gen z=`var'_num/`var'_den 
	recode z .=1
	sort sector year
	gen i_`var'=1
		by sector: replace i_`var'=i_`var'[_n-1]*z if i_`var'[_n-1]~=.
	drop z
}

drop *_num *_den 

egen id=group(sector)
tab id sector
keep sticker net_tuition net_price liv_exp tot_ln_inc i_* year  id
reshape wide sticker net_tuition net_price liv_exp tot_ln_inc i_*, i(year) j(id)

foreach var of varlist sticker* net_tuition* net_price* liv_exp* tot_ln_inc*  {
	sum `var' if year==1993, meanonly 
	gen `var'_graph=i_`var'*r(mean)
} 

*figure 4a and 4b
twoway line i_net_price* year if year>1992, lpattern(solid dash longdash solid dash longdash ) lcolor(black black black grey grey grey) lw(thin thin thin medthick medthick medthick)  	xlabel(1993(5)2020)	 ///
title("Net-of-Aid Tuition Inflation by Sector and Degree") 											///
legend(order(1 "Public BA" 2 "Public MA" 3 "Public Professional" 4 "Nonprofit BA" 5 "Nonprofit MA" 6 "Nonprofit Professional"  ) pos(6) r(2))  	///
ytitle("Index") xtitle("Year") 									///
note("Source: National Postsecondary Student Aid Survey. Aid includes grants, workstudy, Veterans and DoD benefits," "and gradauate assistantships. Excludes tax benefits.")			
graph export "..\visuals\bw_4A_inflation_by_sectordegree.png", as(png) replace

twoway line net_price1 net_price2 net_price3  net_price4 net_price5 net_price6  year if year>1992, lpattern(solid dash longdash solid dash longdash ) lcolor(black black black grey grey grey) lw(thin thin thin medthick medthick medthick) 	xlabel(1993(5)2020)	 ///
title("Net-of-Aid Tuition by Sector and Degree") 											///
legend(order(1 "Public BA" 2 "Public MA" 3 "Public Professional" 4 "Nonprofit BA" 5 "Nonprofit MA" 6 "Nonprofit Professional"  ) pos(6) r(2))  	///
ytitle("Net Tuition and fees (2020 $)") xtitle("Year") 									///    
note("Source: National Postsecondary Student Aid Survey. Aid includes grants, workstudy, Veterans and DoD benefits," "and gradauate assistantships. Excludes tax benefits.")			
graph export "..\visuals\bw_4B_levels_by_sectordegree.png", as(png) replace

*borrowing 
twoway line tot_ln_inc1 tot_ln_inc2 tot_ln_inc3 tot_ln_inc4 tot_ln_inc5 tot_ln_inc6 year if year>1992, ///
xlabel(1993(5)2020)	lpattern(solid dash longdash solid dash longdash ) lcolor(black black black grey grey grey) lw(thin thin thin medthick medthick medthick) 	///
title("Annual Borrowing by Sector and Degree") 									///
legend(order(1 "Public BA" 2 "Public MA" 3 "Public Professional" 4 "Nonprofit BA" 5 "Nonprofit MA" 6 "Nonprofit Professional"  ) pos(6) r(2))  	///
 ytitle("Annual Borrowing ($)") xtitle("Year") 									///
note("Sources: National Postsecondary Student Aid Survey. ")			
graph export "..\visuals\bw_5B_loans_by_degree_sector.png", as(png) replace




*****************************************************			
*AGGREGATE SPENDING GRAPH 

insheet using aggregate_npsas_raw.csv, clear
*adjust for inflation 
merge m:1 year using cpi, keep(matched ) nogen
foreach var of varlist tot_cost-plus_amt tuit_minus_instgr instgr cpi_tuition {
replace `var'=`var'*258.85/cpi_all 
}

gen double count = tot_grant_wss  
*consistently define key variables: 
*total loans to include PLUS
replace tot_ln_inc=tot_ln_exc if tot_ln_inc ==.
*finantial aid net of loans 
gen fin_aid=tot_aid-tot_ln_inc
*net price as total sticker tuition minus aid 
gen net_price=tot_tuition-fin_aid
*net cost as cost of attendence minus aid 
replace net_cost=tot_cost-fin_aid 
*living expenses 
gen liv_exp=tot_cost-tot_tuition 
recode va_dod .=0
recode plus_amt .=0 

collapse (sum) tot_cost-plus_amt tuit_minus_instgr instgr net_price liv_exp fin_aid  (rawsum) enrollment=count [fw=count] ,by(year)

foreach var of varlist tot_cost-plus_amt tuit_minus_instgr instgr net_price liv_exp fin_aid {
	replace `var'=`var'/1000000000
}

*BEA PCE education data--nominal spending 
merge 1:1 year using pce_education, nogen 
merge 1:1 year using cpi, nogen
replace pce_education=pce_education*258.85/cpi_all 
drop if year<1993|year>2020

gen aotc=.
replace aotc=(4.2+2.4)/15.7 if year==2000
replace aotc=(3.3+2.2)/17.7 if year==2004
replace aotc=(3.8+2.5)/19.6 if year==2008
replace aotc=(15.6+2+5.9)/20.9 if year==2012
replace aotc=(15.5+4.6)/21.2 if year==2016
replace aotc=(16.4+4.0)/19.4 if year==2020
recode aotc .=0
replace aotc=aotc*258.85/cpi_all 

*assume inst grants same fraction of total grants prior to 2000 (46%) when not reported 
replace tuit_minus_instgr=tot_tuition-tot_grant*.46 if year<2000

*per capita spending 
foreach var of varlist tot_tuition tuit_minus_instgr net_tuition net_price tot_ln_inc pce_education va_dod tot_grant tot_aid fin_aid fed_wrk {
	replace `var'=`var'/enrollment*1000000
}

gen area_tax=net_price-aotc

twoway (area tot_tuition tuit_minus_instgr net_tuition net_price area_tax year,  color(gs0 gs2 gs4 gs6 gs8)) ///
       (line tot_ln_inc year, lpattern(dash) lcolor(black)) if year>1992, ///
    xlabel(1993(5)2020) ///
    title("Sources of Tuition Funding") ///
    subtitle("Per Capita Tuition, Financial Aid, and Loans") ///
    legend(order(1 "Institutional Aid" 2 "Grants" 3 "Other Aid" 4 "Tax Benefits" 5 "Students/Families" 6 "Total Loans") pos(6) r(2)) ///
    ytitle("Thousands of 2020 Dollars") xtitle("Year") ylabel(0(2)12) ///
    note("Sources: National Postsecondary Student Aid Survey.")
graph export "..\visuals\bw_2_how_tuition_paid.png", as(png) replace

********************************************

*graphs by parent income quartile 			
insheet using parent_income_npsas_raw.csv,clear 		
drop if year==1990 

*adjust for inflation 
merge m:1 year using cpi, keep(matched using ) nogen
foreach var of varlist tot_cost-plus_amt {
replace `var'=`var'*258.85/cpi_all 
}

*consistently define key variables: 
*total loans to include PLUS
replace tot_ln_inc=tot_ln_exc if tot_ln_inc ==.
*finantial aid net of loans 
gen fin_aid=tot_aid-tot_ln_inc
*net price as total sticker tuition minus aid 
gen net_price=tot_tuition-fin_aid
*net cost as cost of attendence minus aid 
replace net_cost=tot_cost-fin_aid 
gen double count = tot_grant_wss  

collapse (mean) tot_tuition  net_tuition net_cost  fin_aid net_price tot_ln_inc  [aw=count], by(paren year)

egen id=group(paren_incq)
keep tot_tuition net_tuition net_price  tot_ln_inc year  id
reshape wide tot_tuition net_tuition net_price  tot_ln_inc  , i(year) j(id)

twoway line net_price1 net_price2 net_price3 net_price4  year if year>1992, lpattern(solid dash dash_dot longdash ) lcolor(black black black black black)	xlabel(1993(5)2020)	 ///
title("Net Tuition Paid by Parent Income Quartile") 													///
subtitle("Inflation adjusted (2020 dollars)")													///
legend(order(1 "1st Quartile" 2 "2nd Quartile" 3 "3rd Quartile" 4 "4th Quartile") pos(6) r(1)) 	///
ytitle("Tuition and fees ($)") xtitle("Year")													///
note("Dependent Bachelor's students at 4-year public and private nonprofit institutions." "Source: National Postsecondary Student Aid Survey. Excludes tax benefits.")
graph export "..\visuals\bw_6_netprice_parent_quartile.png", as(png) replace


twoway line tot_ln_inc1 tot_ln_inc2 tot_ln_inc3 tot_ln_inc4 year if year>1992, lpattern(solid dash dash_dot longdash ) lcolor(black black black black black)	xlabel(1993(5)2020)	  					///
title("Annual Total Borrowing (including PLUS) by Parent Income Quartile") 								///
legend(order(1 "1st Quartile" 2 "2nd Quartile" 3 "3rd Quartile" 4 "4th Quartile") pos(6) r(1)) 	///
ytitle("Loan amount (2020 $)") xtitle("Year")														///
note("Dependent Bachelor's degree students at 4-year public and private nonprofit institutions." "Source: National Postsecondary Student Aid Survey.")
graph export "..\visuals\bw_7_loans_by_parent.png", as(png) replace 


			